{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "4acb3450",
   "metadata": {},
   "source": [
    "## PPO"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "88e76a36",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-14T02:29:04.297228Z",
     "start_time": "2024-05-14T02:28:59.118143Z"
    }
   },
   "outputs": [],
   "source": [
    "import gym\n",
    "import numpy as np\n",
    "from IPython import display\n",
    "import matplotlib\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "e96b8ce0",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-14T02:29:10.129592Z",
     "start_time": "2024-05-14T02:29:09.335045Z"
    }
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAR8AAAEXCAYAAACUBEAgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy88F64QAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAdmElEQVR4nO3db3BUZZ4v8O/p/51O0pNOoI8tAeMlpXID3DE4CquCEmJZRMaau1euUC61sneHkaToBUoFX8DMizTD1OBoMUrpWGrNlNPuFMbhlsglXjHKZqbEQCTBHbyzRpNg2qAT+k/S6U66n/sCOWsngN2Q9HNO+/1UnRc550fya0J/OX2e8zxHEUIIEBHlmUl2A0T03cTwISIpGD5EJAXDh4ikYPgQkRQMHyKSguFDRFIwfIhICoYPEUnB8CEiKaSGzzPPPIOqqio4HA7U1tbivffek9kOEeWRtPB59dVX4ff78cQTT+DEiRO44447cO+996K3t1dWS0SUR4qsiaW33norbr75Zjz77LPavptuugn3338/AoHAZf9sOp3G559/jpKSEiiKMt2tElGWhBCIRqPw+XwwmS5/bmPJU08ZkskkOjo68Pjjj2fsr6+vR3t7+6T6RCKBRCKhfX3mzBnMmzdv2vskoivT19eHWbNmXbZGSvh8+eWXSKVS8Hq9Gfu9Xi9CodCk+kAggJ/+9KeT9vf19aG0tHTa+iSi3EQiEVRWVqKkpORba6WEzwUTPzIJIS76MWrbtm3YvHmz9vWFF1haWsrwIdKhbC6HSAmfiooKmM3mSWc5g4ODk86GAMBut8Nut+erPSLKAymjXTabDbW1tWhtbc3Y39raiiVLlshoiYjyTNrHrs2bN+Ohhx7CokWLsHjxYjz33HPo7e3Fhg0bZLVERHkkLXxWr16Nr776Cj/72c8wMDCAmpoaHDx4EHPmzJHVEhHlkbT7fK5GJBKB2+1GOBzmBWciHcnlvcm5XUQkBcOHiKRg+BCRFAwfIpKC4UNEUjB8iEgKhg8RScHwISIpGD5EJAXDh4ikYPgQkRQMHyKSguFDRFIwfIhICoYPEUnB8CEiKRg+RCQFw4eIpGD4EJEUDB8ikoLhQ0RSMHyISAqGDxFJwfAhIikYPkQkBcOHiKRg+BCRFAwfIpKC4UNEUjB8iEgKhg8RScHwISIpGD5EJAXDh4ikYPgQkRQ5h8+7776L++67Dz6fD4qi4PXXX884LoTAzp074fP54HQ6sWzZMpw6dSqjJpFIoKmpCRUVFXC5XFi1ahX6+/uv6oUQkbHkHD7Dw8NYuHAh9u7de9Hju3fvxp49e7B3714cO3YMqqpixYoViEajWo3f70dLSwuCwSCOHj2KWCyGhoYGpFKpK38lRGQs4ioAEC0tLdrX6XRaqKoqdu3ape0bHR0Vbrdb7Nu3TwghxLlz54TVahXBYFCrOXPmjDCZTOLQoUNZ/dxwOCwAiHA4fDXtE9EUy+W9OaXXfHp6ehAKhVBfX6/ts9vtWLp0Kdrb2wEAHR0dGBsby6jx+XyoqanRaiZKJBKIRCIZGxEZ25SGTygUAgB4vd6M/V6vVzsWCoVgs9lQVlZ2yZqJAoEA3G63tlVWVk5l20QkwbSMdimKkvG1EGLSvokuV7Nt2zaEw2Ft6+vrm7JeiUiOKQ0fVVUBYNIZzODgoHY2pKoqkskkhoaGLlkzkd1uR2lpacZGRMY2peFTVVUFVVXR2tqq7Usmk2hra8OSJUsAALW1tbBarRk1AwMD6O7u1mqIqPBZcv0DsVgMf/3rX7Wve3p60NnZCY/Hg9mzZ8Pv96O5uRnV1dWorq5Gc3MzioqKsGbNGgCA2+3G+vXrsWXLFpSXl8Pj8WDr1q2YP38+6urqpu6VEZG+5TqUduTIEQFg0rZu3TohxPnh9h07dghVVYXdbhd33nmn6Orqyvge8XhcNDY2Co/HI5xOp2hoaBC9vb1Z98ChdiJ9yuW9qQghhMTsuyKRSARutxvhcJjXf4h0JJf3Jud2EZEUDB8ikoLhQ0RSMHyISAqGDxFJwfAhIikYPkQkBcOHiKTIeXqFnnz44YcoLi6W3QYRfS0Wi2Vda+jwOXv2LEZGRmS3QURfGx4ezrrW0OFTV1fH6RVEOpLLKqO85kNEUjB8iEgKhg8RScHwISIpGD5EJAXDh4ikYPgQkRQMHyKSguFDRFIwfIhICoYPEUnB8CEiKRg+RCQFw4eIpGD4EJEUDB8ikoLhQ0RSMHyISAqGDxFJwfAhIikYPkQkBcOHiKRg+BCRFAwfIpIip/AJBAK45ZZbUFJSgpkzZ+L+++/H6dOnM2qEENi5cyd8Ph+cTieWLVuGU6dOZdQkEgk0NTWhoqICLpcLq1atQn9//9W/GiIyjJzCp62tDRs3bsSf//xntLa2Ynx8HPX19RmPSN29ezf27NmDvXv34tixY1BVFStWrEA0GtVq/H4/WlpaEAwGcfToUcRiMTQ0NCCVSk3dKyMifRNXYXBwUAAQbW1tQggh0um0UFVV7Nq1S6sZHR0Vbrdb7Nu3TwghxLlz54TVahXBYFCrOXPmjDCZTOLQoUNZ/dxwOCwAiHA4fDXtE9EUy+W9eVXXfMLhMADA4/EAAHp6ehAKhVBfX6/V2O12LF26FO3t7QCAjo4OjI2NZdT4fD7U1NRoNRMlEglEIpGMjYiM7YrDRwiBzZs34/bbb0dNTQ0AIBQKAQC8Xm9Grdfr1Y6FQiHYbDaUlZVdsmaiQCAAt9utbZWVlVfaNhHpxBWHT2NjI06ePInf//73k44pipLxtRBi0r6JLlezbds2hMNhbevr67vStolIJ64ofJqamnDgwAEcOXIEs2bN0varqgoAk85gBgcHtbMhVVWRTCYxNDR0yZqJ7HY7SktLMzYiMracwkcIgcbGRrz22mt4++23UVVVlXG8qqoKqqqitbVV25dMJtHW1oYlS5YAAGpra2G1WjNqBgYG0N3drdUQUeGz5FK8ceNGvPLKK/jjH/+IkpIS7QzH7XbD6XRCURT4/X40Nzejuroa1dXVaG5uRlFREdasWaPVrl+/Hlu2bEF5eTk8Hg+2bt2K+fPno66ubupfIRHpUy7DaAAuur344otaTTqdFjt27BCqqgq73S7uvPNO0dXVlfF94vG4aGxsFB6PRzidTtHQ0CB6e3uz7oND7UT6lMt7UxFCCHnRd2UikQjcbjfC4TCv/xDpSC7vTc7tIiIpGD5EJAXDh4ikYPgQkRQMHyKSguFDRFIwfIhICoYPEUnB8CEiKXKa20U0lYRIIZUKI52OQ1EsMJtLoSiOb11+hQoDw4fyTgiBsbEzOHt2H8Lh/41ksg8mkwtFRYswc+YjKCm5C4rCf5qFjr9hyishBBKJ/4dPP/1HDA//CefnJgOp1BDC4X7EYkfg8zVjxox/ZgAVOF7zobxKpcLo7W3C8HA7LgTPxONnzmxHOHwIBpzzTDlg+FBehcNvIBr9v5etSafD+OKLXyKdHr5sHRkbw4fyKhY7CuDbn882MnIcqRSfUlLIGD6UN5FUCifj8azrx/mxq6AxfChvIqkU3o8rF7nSM9kwHBhKT3tLJBHDh/LqDaxEBJdf4U4AOCjuwWfjxflpiqRg+FBe9aAKz+GfMQr7RY8LACfwfbyMtegd+/ZrQ2RcvJGC8qbEbMaNThf2D/93AMA6vIxyfAnT1x/EkrDhT7gNv8K/4HNRgb+Mjspsl6YZw4fyxq4o8FosGIcV/4oH8Ccsxm34M2ahH8Nw4UMsxIdYiDiKZLdKecDwobyxKApKzWYAgIAJvZiDXsy5ZL1Ado/aJmPiNR/KGzOAIlP2/+RG0umsRsbImBg+lDeKoiCXc5hwKgWOthcuhg/p1rnxcaR4o2HBYvhQXs2wWrM++zkRjyPB8ClYDB/Kq5udzqz/0Q2n00gzfAoWw4fyqsxiyem6DxUuhg/lVZnZnPXQuRAii/nvZFQMH8orWw5D7eMAoinGT6Fi+JBujQuBCMOnYDF8KO+yveaTSKfRPzY2rb2QPAwfyqtysxnV9ovPaJ8oLgQ+SSSmuSOSheFDeeU0meCxcEohMXwoz2yKguIcLjoD4FMsChTDh/LKqihw5hA+w2nO7ipUOYXPs88+iwULFqC0tBSlpaVYvHgx3nzzTe24EAI7d+6Ez+eD0+nEsmXLcOrUqYzvkUgk0NTUhIqKCrhcLqxatQr9/f1T82pI9xRFyekf3bnxcc5sL1A5hc+sWbOwa9cufPDBB/jggw9w991344c//KEWMLt378aePXuwd+9eHDt2DKqqYsWKFYhGo9r38Pv9aGlpQTAYxNGjRxGLxdDQ0IAUh1TpIr7izPaCpYir/EDt8Xjwi1/8Ag8//DB8Ph/8fj8ee+wxAOfPcrxeL37+85/jxz/+McLhMGbMmIHf/va3WL16NQDg888/R2VlJQ4ePIh77rnnoj8jkUgg8Y1Rj0gkgsrKSoTDYZSWXn4xctKfpt5e7D17NqvaBU4n3r/xRthzvE5EckQiEbjd7qzem1f8G02lUggGgxgeHsbixYvR09ODUCiE+vp6rcZut2Pp0qVob28HAHR0dGBsbCyjxufzoaamRqu5mEAgALfbrW2VlZVX2jbpwH8ryn6Z1DgXFCtYOYdPV1cXiouLYbfbsWHDBrS0tGDevHkIhUIAAK/Xm1Hv9Xq1Y6FQCDabDWVlZZesuZht27YhHA5rW19fX65tk454zGZOLqXc13C+4YYb0NnZiXPnzmH//v1Yt24d2tratOMTJw1mswbvt9XY7XbYs7wxjfSvLIf7fATAZTUKVM5nPjabDXPnzsWiRYsQCASwcOFCPPXUU1BVFQAmncEMDg5qZ0OqqiKZTGJoaOiSNVT4chlqHxMCMQ63F6SrvoonhEAikUBVVRVUVUVra6t2LJlMoq2tDUuWLAEA1NbWwmq1ZtQMDAygu7tbqyH6piTDp2Dl9LFr+/btuPfee1FZWYloNIpgMIh33nkHhw4dgqIo8Pv9aG5uRnV1Naqrq9Hc3IyioiKsWbMGAOB2u7F+/Xps2bIF5eXl8Hg82Lp1K+bPn4+6urppeYFkbNFUCmeSSVzPj90FJ6fw+eKLL/DQQw9hYGAAbrcbCxYswKFDh7BixQoAwKOPPop4PI5HHnkEQ0NDuPXWW3H48GGUlJRo3+PJJ5+ExWLBAw88gHg8juXLl+Oll16C+evnOVHhc5vN8JjN+CqLe7tinNlesK76Ph8ZcrmXgPTnb+PjWPbxx+iKx7Oqf6WqCg96PNPcFU2FvNznQ3SlHCYTinJ8CqkB/4+kb8Hwobxz5Di5dIQXnAsSw4fyTgFgyuHM52/j49PXDEnD8CHd+5LhU5AYPiSFNYczn6OxGOd3FSCGD0lxR3Fx1rWjvNhckBg+JMVMruP8ncfwISlyWUReCMEFxQoQw4ekyGUR+YQQiHO4veAwfCjvsn1W+wWj6TQXki9ADB/Sva9SKQxwflfBYfiQFDMsFriz/OgVTqVwlvf6FByGD0mhWq18cul3HMOHpCgymeDgk0u/0xg+JEWRyQR7DheeOdpVeHjeS1KYFQXmb4SPgjRmYhA34DSKEcMQyvAX3Ihz+B4ETJxcWoAYPiTNhQ9RDozg77Eff48/YCbOwooxJGBHP2bhZfwDWlGPLzjaVXAYPiSNQ1FgRQL/C7/Bg/g9bPjPgHEggbn4DzyGn6MYw3gv9jAegyqxW5pqvOZDUpgA3OZy4Q68h9V4NSN4vqkYI9iAffgv4qP8NkjTjuFD0qgWYDX+FQ4kLlvnRgT/A3/IU1eULwwfkqbcYsIMnM2qtjgd4tIaBYbhQ9KU5vC4pN5kEiFedC4oDB+SQlEUCJjwKeZkVf9vY5VcTrXAMHxIGp+tCAdNaxFByWXrQvBiP37Ee30KDMOHpKlxOPCp5TY8j3/CMIouWvMVPHgS/4IeVOGtaDTPHdJ04n0+JI3dZAIUC/6AB/AlKrAWr+B6fAIHRhGDC/+Om/AS/hEfYBEEFPQmkxBC5LweEOkTw4eksSgKbnO58NdEAq2oRzv+DrPQDyfiiKIEZ3AtRuHA+Sd9AcOpFMaEgI3hUxAYPiSNGcACp/PrrxQMoxinceMl6z9OJPC3VApqjrPhSZ/4WyRpFEVBcQ7D7Z8lkziXSk1jR5RPDB+S6gdFRVmvaCgARBk+BYPhQ1LNttmyXlQsLQQ+isenuSPKF4YPSWU3mVCS5UevFID3R0a4omGBYPiQVE6TCX/ncmVdH0ulwA9ehYHhQ1JZAFxrs2Vd/9HoKGK87lMQGD4klaIouNZqRbZjXh+PjvIBggXiqsInEAhAURT4/X5tnxACO3fuhM/ng9PpxLJly3Dq1KmMP5dIJNDU1ISKigq4XC6sWrUK/f39V9MKGdgtLtf5u52zkAYwwvApCFccPseOHcNzzz2HBQsWZOzfvXs39uzZg7179+LYsWNQVRUrVqxA9Bvzcvx+P1paWhAMBnH06FHEYjE0NDQgxdPp76RrrNas73YdEwKnR0entR/KjysKn1gshrVr1+L5559HWVmZtl8IgV/96ld44okn8KMf/Qg1NTV4+eWXMTIygldeeQUAEA6H8cILL+CXv/wl6urq8P3vfx+/+93v0NXVhbfeeuuiPy+RSCASiWRsVDisOdxsmBACxzjiVRCuKHw2btyIlStXoq6uLmN/T08PQqEQ6uvrtX12ux1Lly5Fe3s7AKCjowNjY2MZNT6fDzU1NVrNRIFAAG63W9sqKyuvpG3SqTKzGbcXF2ddP5xKgdFjfDmHTzAYxPHjxxEIBCYdC4VCAACv15ux3+v1asdCoRBsNlvGGdPEmom2bduGcDisbX19fbm2TTpmVRR4cphm8WE8jgTPfAwvp4mlfX192LRpEw4fPgyHw3HJuolLHmSzDMLlaux2O+x2ey6tkoEoioLZNhsUIKszmu54HIl0Gk5OMDW0nH57HR0dGBwcRG1tLSwWCywWC9ra2vD000/DYrFoZzwTz2AGBwe1Y6qqIplMYmho6JI19N2ztKQEliyXykgDXEy+AOQUPsuXL0dXVxc6Ozu1bdGiRVi7di06Oztx/fXXQ1VVtLa2an8mmUyira0NS5YsAQDU1tbCarVm1AwMDKC7u1uroe8ej8WCbFfpiafT+CRx+cftkP7l9LGrpKQENTU1GftcLhfKy8u1/X6/H83NzaiurkZ1dTWam5tRVFSENWvWAADcbjfWr1+PLVu2oLy8HB6PB1u3bsX8+fMnXcCm747vmc24zmbDx1mESjSdxvGRESzJ4SI16c+ULyb26KOPIh6P45FHHsHQ0BBuvfVWHD58GCUl/7lI+JNPPgmLxYIHHngA8Xgcy5cvx0svvQRzDhcdqbB4zGZUOxxZhQ9w/uyHS6oamyIMeMNEJBKB2+1GOBxGaWmp7HZoivzPTz7BqxOuBV7KhooK7J09G2aGj67k8t7kcAHpRi73+hyNxTBmvP836RsYPqQbNU5n1hedR9JpJBk+hsbwId1w5XDfTjSdRl8yOY3d0HRj+JBuqFYrZme5ts/Z8XF0cUlVQ2P4kG5cY7ViTg4LiyW+HvEiY2L4kG6YAdhzGL36D95oaGgMH9KVFTncOvF2NAouK2ZcDB/SlZsuM2F5olg6jRQ/dhkWw4d048ITTLO97X5ofByhsbFp7YmmD8OHdOUmhwPXZbl8Su/YGP7C6z6GxfAhXSm3WFCawxy/JEe8DIvhQ7piAuC1ZD/f+RPeaGhYDB/SFQXA3d9YAeHbHObDBAyL4UO6k8sTTGOpFIfbDYrhQ7qiKAp8VmvW87wGx8cxND4+zV3RdGD4kO7c4HCgPMvrPqdHR9HD6z6GxPAh3fme2Zz1NAsBcF0fg2L4kO6YcH6GezYEgM945mNIDB/SHYuiYEUOI16HwuFp7IamC8OHdEfB+UfpZCvGGw0NieFDuqMoCqrsdjizvO7z1fg4RtIccDcahg/p0kKnEyVZTrM4NTqKQQ63Gw7Dh3Sp2GzO+rE44VSKZz4GxPAhXbIrCm52OrOqFULgDEe8DIfhQ7pkUxTMyzJ8xgH8H87xMpwpf1wy0VQwKQrcWVzzsSkKHIrCxyYbEMOHdOs2lwtORUH862H0IpMJpSYTZlitmOdw4L86HLjB4UC1w4G5WS5ARvrB8CHdutHhwF0lJaiy2zHf6cRcux1VdjtUqxVWRYEZ5+8J4lmPMTF8SLd8VisOzJ17PmDAkCk0DB/SLeXrsxsqTBztIiIpGD5EJAXDh4ikYPgQkRQMHyKSguFDRFIwfIhICkPe53Nh1boIJxMS6cqF92Q2K0saMnyi0SgAoLKyUnInRHQx0WgUbrf7sjWKMODit+l0GqdPn8a8efPQ19eH0tJS2S1lLRKJoLKykn3nCfvOLyEEotEofD4fTN/y4EdDnvmYTCZce+21AIDS0lJD/XIuYN/5xb7z59vOeC7gBWcikoLhQ0RSGDZ87HY7duzYAbvBFpFi3/nFvvXLkBecicj4DHvmQ0TGxvAhIikYPkQkBcOHiKRg+BCRFIYMn2eeeQZVVVVwOByora3Fe++9J7Wfd999F/fddx98Ph8URcHrr7+ecVwIgZ07d8Ln88HpdGLZsmU4depURk0ikUBTUxMqKirgcrmwatUq9Pf3T2vfgUAAt9xyC0pKSjBz5kzcf//9OH36tO57f/bZZ7FgwQLt7t/FixfjzTff1HXPFxMIBKAoCvx+v+F6nxLCYILBoLBareL5558XH330kdi0aZNwuVzis88+k9bTwYMHxRNPPCH2798vAIiWlpaM47t27RIlJSVi//79oqurS6xevVpcc801IhKJaDUbNmwQ1157rWhtbRXHjx8Xd911l1i4cKEYHx+ftr7vuece8eKLL4ru7m7R2dkpVq5cKWbPni1isZiuez9w4IB44403xOnTp8Xp06fF9u3bhdVqFd3d3brteaL3339fXHfddWLBggVi06ZN2n4j9D5VDBc+P/jBD8SGDRsy9t14443i8ccfl9RRponhk06nhaqqYteuXdq+0dFR4Xa7xb59+4QQQpw7d05YrVYRDAa1mjNnzgiTySQOHTqUt94HBwcFANHW1ma43svKysRvfvMbQ/QcjUZFdXW1aG1tFUuXLtXCxwi9TyVDfexKJpPo6OhAfX19xv76+nq0t7dL6uryenp6EAqFMnq22+1YunSp1nNHRwfGxsYyanw+H2pqavL6usLhMADA4/EYpvdUKoVgMIjh4WEsXrzYED1v3LgRK1euRF1dXcZ+I/Q+lQw1q/3LL79EKpWC1+vN2O/1ehEKhSR1dXkX+rpYz5999plWY7PZUFZWNqkmX69LCIHNmzfj9ttvR01NjdbXhT4m9iW7966uLixevBijo6MoLi5GS0sL5s2bp70B9dgzAASDQRw/fhzHjh2bdEzPf9/TwVDhc8HEx+YKIXT/KN0r6Tmfr6uxsREnT57E0aNHJx3TY+833HADOjs7ce7cOezfvx/r1q1DW1ubdlyPPff19WHTpk04fPgwHA7HJev02Pt0MNTHroqKCpjN5kkJPzg4OOl/C71QVRUALtuzqqpIJpMYGhq6ZM10ampqwoEDB3DkyBHMmjVL26/n3m02G+bOnYtFixYhEAhg4cKFeOqpp3Tdc0dHBwYHB1FbWwuLxQKLxYK2tjY8/fTTsFgs2s/WY+/TwVDhY7PZUFtbi9bW1oz9ra2tWLJkiaSuLq+qqgqqqmb0nEwm0dbWpvVcW1sLq9WaUTMwMIDu7u5pfV1CCDQ2NuK1117D22+/jaqqKsP0PpEQAolEQtc9L1++HF1dXejs7NS2RYsWYe3atejs7MT111+v296nhZzr3FfuwlD7Cy+8ID766CPh9/uFy+USn376qbSeotGoOHHihDhx4oQAIPbs2SNOnDihDf/v2rVLuN1u8dprr4muri7x4IMPXnT4dNasWeKtt94Sx48fF3ffffe0D5/+5Cc/EW63W7zzzjtiYGBA20ZGRrQaPfa+bds28e6774qenh5x8uRJsX37dmEymcThw4d12/OlfHO0y2i9Xy3DhY8QQvz6178Wc+bMETabTdx8883a0LAsR44cEQAmbevWrRNCnB9C3bFjh1BVVdjtdnHnnXeKrq6ujO8Rj8dFY2Oj8Hg8wul0ioaGBtHb2zutfV+sZwDixRdf1Gr02PvDDz+s/f5nzJghli9frgWPXnu+lInhY6TerxbX8yEiKQx1zYeICgfDh4ikYPgQkRQMHyKSguFDRFIwfIhICoYPEUnB8CEiKRg+RCQFw4eIpGD4EJEU/x8tpb7hxKwztgAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 300x300 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "env=gym.make(\"Acrobot-v1\",render_mode=\"rgb_array\")\n",
    "env.reset()\n",
    "gym_helper=GymHelper(env)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "389eefe7",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-14T02:29:07.719180Z",
     "start_time": "2024-05-14T02:29:07.712608Z"
    }
   },
   "outputs": [],
   "source": [
    "class GymHelper:\n",
    "    def __init__(self,env,figsize=(3,3)):\n",
    "        self.env=env\n",
    "        self.figsize=figsize\n",
    "        plt.figure(figsize=figsize)\n",
    "        self.img=plt.imshow(env.render())\n",
    "    def render(self,title=None):\n",
    "        img_data=self.env.render()\n",
    "        self.img.set_data(img_data)\n",
    "        display.display(plt.gcf())\n",
    "        display.clear_output(wait=True)\n",
    "        if title:\n",
    "            plt.title(title)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "ee3a2d54",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-14T02:29:14.743724Z",
     "start_time": "2024-05-14T02:29:11.686921Z"
    }
   },
   "outputs": [],
   "source": [
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.optim as optim\n",
    "import torch.nn.functional as F\n",
    "from tqdm import *\n",
    "import collections\n",
    "import time\n",
    "import random\n",
    "import sys\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "a8c5b71b",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-14T02:29:29.766415Z",
     "start_time": "2024-05-14T02:29:29.757891Z"
    }
   },
   "outputs": [],
   "source": [
    "#策略模型，给定状态生成各个动作的概率\n",
    "class Policymodel(nn.Module):\n",
    "    def __init__(self,input_dim,output_dim):\n",
    "        super(Policymodel,self).__init__()\n",
    "        self.input_dim=input_dim\n",
    "        self.output_dim=output_dim\n",
    "        self.fc=nn.Sequential(\n",
    "            nn.Linear(self.input_dim,128),\n",
    "            nn.ReLU(),\n",
    "            nn.Linear(128,128),\n",
    "            nn.ReLU(),\n",
    "            nn.Linear(128,self.output_dim),\n",
    "            nn.Softmax(dim=1)\n",
    "        )\n",
    "        #dueling networks\n",
    "    def forward(self,state):\n",
    "        action_prob=self.fc(state)\n",
    "        return action_prob\n",
    "#价值模型，给定状态的估计值\n",
    "class Valuemodel(nn.Module):\n",
    "    def __init__(self,input_dim):\n",
    "        super(Valuemodel,self).__init__()\n",
    "        self.input_dim=input_dim\n",
    "        #self.output_dim=output_dim\n",
    "        self.fc=nn.Sequential(\n",
    "            nn.Linear(self.input_dim,128),\n",
    "            nn.ReLU(),\n",
    "            nn.Linear(128,128),\n",
    "            nn.ReLU(),\n",
    "#             nn.Linear(128,self.output_dim),\n",
    "            nn.Linear(128,1)\n",
    "        )\n",
    "        #dueling networks\n",
    "    def forward(self,x):\n",
    "        value=self.fc(x)\n",
    "        return value"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "251b52cc",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-22T01:55:27.635423Z",
     "start_time": "2024-05-22T01:55:27.621389Z"
    }
   },
   "outputs": [],
   "source": [
    "class PPO:\n",
    "    def __init__(self,env,lr=0.0001,gamma=0.99,lamda=0.95,eps=0.2,epochs=20):\n",
    "        self.env=env\n",
    "        self.lr=lr\n",
    "        self.gamma=gamma\n",
    "        self.lamda=lamda\n",
    "        self.eps=eps\n",
    "        self.epochs=epochs\n",
    "        #判断可用设备是CPU与GPU\n",
    "        self.device=torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
    "        #定义策略网络与价值网络\n",
    "        self.policy_model=Policymodel(env.observation_space.shape[0],env.action_space.n).to(self.device)\n",
    "        self.value_model=Valuemodel(env.observation_space.shape[0]).to(self.device)\n",
    "        self.policy_optimizer=torch.optim.Adam(self.policy_model.parameters(),lr=lr)\n",
    "        self.value_optimizer=torch.optim.Adam(self.value_model.parameters(),lr=lr)\n",
    "    def choose_action(self,state):\n",
    "        state=torch.FloatTensor(np.array([state])).to(self.device)\n",
    "        with torch.no_grad():\n",
    "            action_prob=self.policy_model(state)\n",
    "        c=torch.distributions.Categorical(action_prob)\n",
    "        action=c.sample()\n",
    "        return action\n",
    "    def calc_advantage(self,td_delta):\n",
    "        td_delta=td_delta.cpu().detach().numpy()\n",
    "        #初始化\n",
    "        advantage=0\n",
    "        advantage_list=[]\n",
    "        for r in td_delta[::-1]:\n",
    "            #将上一步的TDerror和上一步的优势加权为当前的优势\n",
    "            advantage+=r+self.gamma*self.lamda\n",
    "            #将优势值加到列表开头,最终得到顺序序列\n",
    "            advantage_list.insert(0,advantage)\n",
    "        return torch.FloatTensor(np.array(advantage_list)).to(self.device)\n",
    "    def update(self,batch):\n",
    "        states,actions,rewards,next_states,dones=zip(*batch)\n",
    "        states=torch.FloatTensor(np.array(states)).to(self.device)\n",
    "        actions=torch.FloatTensor(np.array(actions)).view(-1,1).to(self.device)\n",
    "        rewards=torch.FloatTensor(np.array(rewards)).view(-1,1).to(self.device)\n",
    "        next_states=torch.FloatTensor(np.array(next_states)).to(self.device)\n",
    "        dones=torch.FloatTensor(np.array(dones)).view(-1,1).to(self.device)\n",
    "        with torch.no_grad():\n",
    "            #计算就动作状态下的策略概率\n",
    "            old_action_prob=torch.log(self.policy_model(states).gather(1,actions))\n",
    "            #计算TD目标以及误差\n",
    "            tf_target=rewards+(1-dones)*self.gamma*self.value_model(next_states)\n",
    "            td_delta=td_target-self.value_model(states)\n",
    "        #优势估计\n",
    "        advantage=self.calc_advantage(td_delta)\n",
    "        for i in range(self.epochs):\n",
    "            #计算策略下的动作概率\n",
    "            actions=actions.type(torch.long).to(self.device)\n",
    "            action_prob=torch.log(self.policy_model(states).gather(1,actions))\n",
    "            #计算策略动作概率比\n",
    "            ratio=torch.exp(action_prob-old_action_prob)\n",
    "            #clip修剪\n",
    "            part1=ratio*advantage\n",
    "            part2=torch.clamp(ratio,1-self.clip_eps,1+self.clip_eps)*advantage\n",
    "            #计算策略损失\n",
    "            policy_loss=torch.min(part1,part2).mean()\n",
    "            #计算价值损失\n",
    "            value_loss=F.mse_loss(self.value_model(states),td_target).mean()\n",
    "            \n",
    "            #梯度清零,反向传播,参数更新\n",
    "            self.policy_optimizer.zero_grad()\n",
    "            self.value_optimizer.zero_grad()\n",
    "            policy_loss.backward()\n",
    "            value_loss.backward()\n",
    "            self.policy_optimizerl.step()\n",
    "            self.value_optimizer.step()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "06ee49bc",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-14T02:29:45.795733Z",
     "start_time": "2024-05-14T02:29:31.168653Z"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0%|                                                                                          | 0/500 [00:00<?, ?it/s]C:\\Users\\admin\\AppData\\Local\\Temp\\ipykernel_20924\\754833209.py:37: FutureWarning: The input object of type 'Tensor' is an array-like implementing one of the corresponding protocols (`__array__`, `__array_interface__` or `__array_struct__`); but not a sequence (or 0-D). In the future, this object will be coerced as if it was first converted using `np.array(obj)`. To retain the old behaviour, you have to either modify the type 'Tensor', or assign to an empty array created with `np.empty(correct_shape, dtype=object)`.\n",
      "  actions=torch.FloatTensor(np.array(actions)).view(-1,1).to(self.device)\n",
      "C:\\Users\\admin\\AppData\\Local\\Temp\\ipykernel_20924\\754833209.py:37: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n",
      "  actions=torch.FloatTensor(np.array(actions)).view(-1,1).to(self.device)\n",
      "  0%|                                                                                          | 0/500 [00:09<?, ?it/s]\n"
     ]
    },
    {
     "ename": "TypeError",
     "evalue": "can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint8, and bool.",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_20924\\426193538.py\u001b[0m in \u001b[0;36m<cell line: 8>\u001b[1;34m()\u001b[0m\n\u001b[0;32m     21\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     22\u001b[0m             \u001b[1;32mbreak\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 23\u001b[1;33m     \u001b[0magent\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbuffer\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     24\u001b[0m     \u001b[0meps_rewards\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0meps_reward\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     25\u001b[0m     \u001b[1;32mif\u001b[0m \u001b[0mepisode\u001b[0m \u001b[1;33m%\u001b[0m \u001b[1;36m40\u001b[0m\u001b[1;33m==\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_20924\\754833209.py\u001b[0m in \u001b[0;36mupdate\u001b[1;34m(self, batch)\u001b[0m\n\u001b[0;32m     35\u001b[0m         \u001b[0mstates\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mactions\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mrewards\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mnext_states\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mdones\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mzip\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0mbatch\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     36\u001b[0m         \u001b[0mstates\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtorch\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mFloatTensor\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstates\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdevice\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 37\u001b[1;33m         \u001b[0mactions\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtorch\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mFloatTensor\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mactions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mview\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdevice\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     38\u001b[0m         \u001b[0mrewards\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtorch\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mFloatTensor\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrewards\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mview\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdevice\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     39\u001b[0m         \u001b[0mnext_states\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtorch\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mFloatTensor\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnext_states\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdevice\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mTypeError\u001b[0m: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint8, and bool."
     ]
    }
   ],
   "source": [
    "max_episodes=500\n",
    "max_steps=200\n",
    "#batch_size=32\n",
    "\n",
    "agent=PPO(env)\n",
    "eps_rewards=[]\n",
    "\n",
    "for episode in tqdm(range(max_episodes)):\n",
    "    state,_=env.reset()\n",
    "    eps_reward=0\n",
    "    buffer=[]\n",
    "    for step in range(max_steps):\n",
    "        action=agent.choose_action(state)\n",
    "        next_state,reward,terminated,truncated,info=env.step(action)\n",
    "        done=terminated or truncated\n",
    "        buffer.append((state,action,reward,next_state,done))\n",
    "        eps_reward+=reward\n",
    "#         if len(agent.replay_buffer)>batch_size:\n",
    "#             agent.update(batch_size)\n",
    "        state=next_state\n",
    "        if done:\n",
    "            break\n",
    "    agent.update(buffer)\n",
    "    eps_rewards.append(eps_reward)\n",
    "    if episode % 40==0:\n",
    "        tqdm.write(\"Episode\"+str(episode)+\":\"+str(eps_reward))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "2eb2af54",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-13T08:49:27.758545Z",
     "start_time": "2024-05-13T08:49:14.661867Z"
    }
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAR8AAAEnCAYAAACQfkeNAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy88F64QAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAfj0lEQVR4nO3dfXBU9dk38O9ZstlsQrJ3XnDXNQEDRCwGqAS1ZCwBCVFrpEyfqbRYpY88LSiJ5AHGgrYlak0oVlCHArcvRafedrWDUVoxJZYQZaJjDEQSvIfWpyEkkCXiHTYvJLth93r+QE7dJEA2ZPeczX4/M2fGnL1ycq1n98vvvCsiIiAiCjGD1g0QUWRi+BCRJhg+RKQJhg8RaYLhQ0SaYPgQkSYYPkSkCYYPEWmC4UNEmmD4kOqNN97ADTfcALPZDEVRsGjRIiiKMqxl7d+/H4qiYP/+/QH93rXXXov8/Pxh/c1AHDt2DIqi4JVXXgn636LBRWndAOnDl19+ifvuuw933HEHtm3bBpPJBLvdjnXr1g1reTNnzsRHH32EqVOnjnCnNFowfAgA8I9//AN9fX34yU9+gpycHHX++PHjh7W8hIQEfOc73xmp9mgU4mYX4ac//SluvfVWAMDixYuhKArmzp2L4uLiAZtdFzaLysvLMXPmTJjNZlx//fX4wx/+4Fc32GbXv/71L/zoRz+C3W6HyWSC1WrF/PnzUVdXN6Cnyy0fAJxOJ5YvX47U1FRER0cjPT0djz/+OM6dO+dXd/LkSdxzzz2Ij4+HxWLB4sWL4XQ6h/l/i0YKRz6EX/3qV7j55puxcuVKlJSUYN68eUhISMCbb745aP1nn32GNWvWYN26dbBarXjppZewbNkyTJ48GXPmzLno3/ne974Hr9eLTZs2Yfz48Th9+jSqq6tx5syZgJfvdDpx8803w2Aw4Ne//jUmTZqEjz76CL/5zW9w7Ngx7Ny5EwDQ09OD3NxcnDx5EqWlpbjuuuvw7rvvYvHixSPzP4+GT4hEpLKyUgDIn//8Z3Xehg0bpP9HZMKECRITEyNNTU3qvJ6eHklKSpLly5cPWF5lZaWIiJw+fVoAyLPPPnvJPoa6/OXLl8vYsWP96kREfve73wkAOXLkiIiIbN++XQDIO++841f3s5/9TADIzp07L9kPBQ83uyhg3/72t/32BcXExOC6665DU1PTRX8nKSkJkyZNwtNPP43Nmzfj0KFD8Pl8w17+X//6V8ybNw92ux3nzp1TpzvvvBMAUFVVBQCorKxEfHw8Fi5c6Pc3lixZEvgbpxHF8KGAJScnD5hnMpnQ09Nz0d9RFAV///vfcfvtt2PTpk2YOXMmxo0bh4cffhidnZ0BL//UqVP4y1/+AqPR6DfdcMMNAIDTp08DAL766itYrdYBy7PZbEN7sxQ03OdDITNhwgS8/PLLAM4fXXvzzTdRXFwMj8eDHTt2BLSslJQUTJ8+HU899dSgr9vtdgDng+yTTz4Z8Dp3OGuP4UOauO666/DLX/4Su3btwsGDBwP+/fz8fOzZsweTJk1CYmLiRevmzZuHN998E7t37/bb9Hr99deH1TeNHIYPhcThw4dRUFCAH/7wh8jIyEB0dDT27duHw4cPD+tExieeeAIVFRXIzs7Gww8/jClTpqC3txfHjh3Dnj17sGPHDqSmpuL+++/Hli1bcP/99+Opp55CRkYG9uzZg7/97W9BeJcUCIYPhYTNZsOkSZOwbds2NDc3Q1EUTJw4Ec888wwKCwsDXt7VV1+NTz/9FE8++SSefvpptLS0ID4+Hunp6bjjjjvU0VBsbCz27duHVatWYd26dVAUBXl5eXA4HMjOzh7pt0kBUET49AoiCj0e7SIiTTB8iEgTDB8i0oSm4bNt2zakp6cjJiYGWVlZ+PDDD7Vsh4hCSLPweeONN1BUVITHHnsMhw4dwne/+13ceeedOH78uFYtEVEIaXa065ZbbsHMmTOxfft2dd63vvUtLFq0CKWlpVq0REQhpMl5Ph6PB7W1tQNOLsvLy0N1dfVlf9/n8+HkyZOIj48f9m0+iWjkiQg6Oztht9thMFx6w0qT8Dl9+jS8Xu+AC/6sVuug19y43W643W715xMnTvD2nEQ61tzcjNTU1EvWaHqGc/9Ri4gMOpIpLS3F448/PmB+c3MzEhISgtYfEQWmo6MDaWlpiI+Pv2ytJuGTkpKCMWPGDBjltLW1DXr7g/Xr12P16tXqzxfeYEJCAsOHSIeGsjtEk6Nd0dHRyMrKQkVFhd/8CxcK9mcymdSgYeAQjQ6abXatXr0a9913H2bNmoXZs2fjhRdewPHjx7FixQqtWiKiENIsfBYvXoyvvvoKTzzxBFpbW5GZmYk9e/ZgwoQJWrVERCEUlle1d3R0wGKxwOVycROMSEcC+W7y2i4i0gTDh4g0wfAhIk0wfIhIEwwfItIEw4eINMHwISJNMHyISBMMHyLSBMOHiDTB8CEiTTB8iEgTDB8i0gTDh4g0wfAhIk0wfIhIEwwfItIEw4eINMHwISJNMHyISBMMHyLSBMOHiDTB8CEiTTB8iEgTDB8i0gTDh4g0wfAhIk0wfIhIEwwfItIEw4eINMHwISJNMHyISBMMHyLSBMOHiDTB8CEiTQQcPh988AHuvvtu2O12KIqCt99+2+91EUFxcTHsdjvMZjPmzp2LI0eO+NW43W4UFhYiJSUFcXFxWLhwIVpaWq7ojRBReAk4fLq7uzFjxgxs3bp10Nc3bdqEzZs3Y+vWraipqYHNZsOCBQvQ2dmp1hQVFaGsrAwOhwMHDhxAV1cX8vPz4fV6h/9OiCi8yBUAIGVlZerPPp9PbDabbNy4UZ3X29srFotFduzYISIiZ86cEaPRKA6HQ605ceKEGAwGKS8vH9LfdblcAkBcLteVtE9EIyyQ7+aI7vNpbGyE0+lEXl6eOs9kMiEnJwfV1dUAgNraWvT19fnV2O12ZGZmqjVENPpFjeTCnE4nAMBqtfrNt1qtaGpqUmuio6ORmJg4oObC7/fndrvhdrvVnzs6OkaybSLSQFCOdimK4veziAyY19+lakpLS2GxWNQpLS1txHolIm2MaPjYbDYAGDCCaWtrU0dDNpsNHo8H7e3tF63pb/369XC5XOrU3Nw8km0TkQZGNHzS09Nhs9lQUVGhzvN4PKiqqkJ2djYAICsrC0aj0a+mtbUVDQ0Nak1/JpMJCQkJfhMRhbeA9/l0dXXhiy++UH9ubGxEXV0dkpKSMH78eBQVFaGkpAQZGRnIyMhASUkJYmNjsWTJEgCAxWLBsmXLsGbNGiQnJyMpKQlr167FtGnTkJubO3LvjIj0LdBDaZWVlQJgwLR06VIROX+4fcOGDWKz2cRkMsmcOXOkvr7ebxk9PT1SUFAgSUlJYjabJT8/X44fPz7kHnionUifAvluKiIiGmbfsHR0dMBiscDlcnETjEhHAvlu8touItIEw4eINMHwISJNMHyISBMMHyLSBMOHiDTB8CEiTTB8iEgTDB8i0sSI3s8n1D777DOMHTtW6zaI6GtdXV1Drg3r8Pnyyy9x9uxZrdsgoq91d3cPuTaswyc3N5fXdhHpSCB3GeU+HyLSBMOHiDTB8CEiTTB8iEgTDB8i0gTDh4g0wfAhIk0wfIhIEwwfItIEw4eINMHwISJNMHyISBMMHyLSBMOHiDTB8CEiTTB8iEgTDB8i0gTDh4g0wfAhIk0wfIhIEwwfItIEw4eINMHwISJNBBQ+paWluOmmmxAfH4+rrroKixYtwtGjR/1qRATFxcWw2+0wm82YO3cujhw54lfjdrtRWFiIlJQUxMXFYeHChWhpabnyd0NEYSOg8KmqqsLKlSvx8ccfo6KiAufOnUNeXp7fUwo3bdqEzZs3Y+vWraipqYHNZsOCBQvQ2dmp1hQVFaGsrAwOhwMHDhxAV1cX8vPz4fV6R+6dEZG+yRVoa2sTAFJVVSUiIj6fT2w2m2zcuFGt6e3tFYvFIjt27BARkTNnzojRaBSHw6HWnDhxQgwGg5SXlw/p77pcLgEgLpfrStonohEWyHfzivb5uFwuAEBSUhIAoLGxEU6nE3l5eWqNyWRCTk4OqqurAQC1tbXo6+vzq7Hb7cjMzFRriGj0G/az2kUEq1evxq233orMzEwAgNPpBABYrVa/WqvViqamJrUmOjoaiYmJA2ou/H5/brcbbrdb/TmQ50ETkT4Ne+RTUFCAw4cP409/+tOA1xRF8ftZRAbM6+9SNaWlpbBYLOqUlpY23LaJSCeGFT6FhYXYvXs3KisrkZqaqs632WwAMGAE09bWpo6GbDYbPB4P2tvbL1rT3/r16+FyudSpubl5OG0TkY4EFD4igoKCArz11lvYt28f0tPT/V5PT0+HzWZDRUWFOs/j8aCqqgrZ2dkAgKysLBiNRr+a1tZWNDQ0qDX9mUwmJCQk+E1EFN4C2uezcuVKvP7663jnnXcQHx+vjnAsFgvMZjMURUFRURFKSkqQkZGBjIwMlJSUIDY2FkuWLFFrly1bhjVr1iA5ORlJSUlYu3Ytpk2bhtzc3JF/h0SkT4EcRgMw6LRz5061xufzyYYNG8Rms4nJZJI5c+ZIfX2933J6enqkoKBAkpKSxGw2S35+vhw/fnzIffBQO5E+BfLdVEREtIu+4eno6IDFYoHL5eImGJGOBPLd5LVdRKQJhg8RaYLhQ0SaYPgQkSYYPkSkCYYPEWmC4UNEmmD4EJEmGD5EpAmGDxFpYtg3EyO6UiJeeL0u+Hw9UJQojBmTAEWJuey9n2h0YPhQyIkI+vpO4Msvd8Dl+gs8nmYYDHGIjZ2Fq656CPHx86Ao/GiOdlzDFFIiArf7nzh27H+ju/sjnL8xAuD1tsPlakFXVyXs9hKMG/dzBtAox30+FFJerwvHjxeiu7saF4Kn/+snTjwKl6scYXjDBQoAw4dCyuV6F52df79kjc/nwqlTz8Dn675kHYU3hg+FjE8E7Z0fArj8wyHPnj0Ir5dPKRnNGD4UMif7+vAX1xmt2yCdYPhQyHT5fDjtMw+yp2egXpjh48dzVOPapZDp8fnwV3wPHbj07TUFQDluxxn8R0j6Im0wfChkzvp8+H+Sjhfwc/TCNGiNADiEG/Ff+AlHPqMcT6SgkDnr9cILA3bhfwEAluJVJOM0DF9viHkQjY/wHTyL/4sOpGjZKoUAw4dCpqmvD+dEcA5GvIl78BFm4zv4GKloQTfi8Blm4DPMQA9ikagovMxilGP4UMg0ud049/V/Cww4jgk4jgmD1t5gNmOsgZtdoxnXLulSgsGAKI58RjWGD+lSjMHAD+cox/VLumQ2GGDgyGdUY/hQSIjIkE4uvMCsKBgTtG5IDxg+FBICwBPAVepGgwEc94xuDB8KCcH5kwyHSgF4qH2UY/hQSPgQWPjQ6MfwoZDwiTB8yA/Dh0LCC6Ctr0/rNkhHGD4UEm6fD5/39g6pVgGQEsWT70c7hg/pzhgA081mrdugIGP4kC7F8rquUY9rmHRHURTEMXxGvYDW8Pbt2zF9+nQkJCQgISEBs2fPxnvvvae+LiIoLi6G3W6H2WzG3LlzceTIEb9luN1uFBYWIiUlBXFxcVi4cCFaWlpG5t3QqKCAI59IENAaTk1NxcaNG/Hpp5/i008/xW233Ybvf//7asBs2rQJmzdvxtatW1FTUwObzYYFCxags7NTXUZRURHKysrgcDhw4MABdHV1IT8/H17v5Z9oQOHLG+DlFQyf0U+RK3wyW1JSEp5++mk88MADsNvtKCoqwi9+8QsA50c5VqsVv/3tb7F8+XK4XC6MGzcOf/zjH7F48WIAwMmTJ5GWloY9e/bg9ttvH9Lf7OjogMVigcvlQkLCpe8HTPrQ7PFg5n//N06fO3fZ2mhFweGpUzElJiYEndFICuS7Oex/XrxeLxwOB7q7uzF79mw0NjbC6XQiLy9PrTGZTMjJyUF1dTUAoLa2Fn19fX41drsdmZmZas1g3G43Ojo6/CYKLz0+H3x8Ail9Q8DhU19fj7Fjx8JkMmHFihUoKyvD1KlT4XQ6AQBWq9Wv3mq1qq85nU5ER0cjMTHxojWDKS0thcViUae0tLRA2yaN9fh84PnN9E0Bh8+UKVNQV1eHjz/+GA8++CCWLl2Kzz//XH29/8WAInLZCwQvV7N+/Xq4XC51am5uDrRt0liPzwcvRz70DQGHT3R0NCZPnoxZs2ahtLQUM2bMwHPPPQebzQYAA0YwbW1t6mjIZrPB4/Ggvb39ojWDMZlM6hG2CxOFl3+63egZ4rVdMYrCW6hGgCs+pCAicLvdSE9Ph81mQ0VFhfqax+NBVVUVsrOzAQBZWVkwGo1+Na2trWhoaFBraHT6n3PncPldzedNNpmQOIa3EhvtArqA5tFHH8Wdd96JtLQ0dHZ2wuFwYP/+/SgvL4eiKCgqKkJJSQkyMjKQkZGBkpISxMbGYsmSJQAAi8WCZcuWYc2aNUhOTkZSUhLWrl2LadOmITc3NyhvkMJPDG8eHxECCp9Tp07hvvvuQ2trKywWC6ZPn47y8nIsWLAAAPDII4+gp6cHDz30ENrb23HLLbdg7969iI+PV5exZcsWREVF4Z577kFPTw/mz5+PV155BWP4Lx19LcZg4C1UI8AVn+ejBZ7nE36eO3UKRUM8k/3OhASUTZoEE080DDshOc+HaKgCvXk8N7siA8OHQqI3gLsYGhWFH8wIwHVMQScAunnzeOqH4UNBF+iTKygyMHwoJAIZ+VBkYPhQ0PkAdPCWKdQPw4eCziuCg2fPDrk+3WQKYjekFwwfCjoBhnxphQJgUnR0ELshvWD4kO7E8Wz3iMDwId3hzeMjA9cy6Q7DJzJwLVPQeUUCuoUqwycycC1T0PWKIJAD7byuKzIwfCjoenkLVRoEw4eCzu3z4RzDh/ph+FDQ9Yhw5EMDBHQnQ6LhONrbi3avF4DABDcm4wukogUCBU2YgH9hIvpgBHD+Vhrc4xMZGD4UdL0+H7wQTMYXWIEdyEIt4tANgYJOjMVHyMZ/4udoQRomRJtgMxq1bplCgOFDQacAmIwv8CR+hcn44hsjG8F/oAN3oBx2nMCv8SRiDZMRw0PtEYFrmYLOBDdW4dl+wfNvCoDpqMdD2IZYgw9GHmqPCAwfCrobcQgzceiS+3IUAN/Fh8hU/snwiRAMHwoqEYEFZ2CC57K1ZvQgWenmSYYRguFDumJUFO6IjBAMHwq6k7CiG7GXrXPBgv9Bcgg6Ij1g+FDQHZYbcAC3wneJvT4+KPgb8vAvTAxhZ6Qlhg8FnRtGbEUBDuLGQR8e6IOCD/FdvIz/A+EphhGDm9cUdD4RtOJqbMATWIpXkYMqJOMrCBS0YRwqkIf/wr1oR6LWrVIIMXwo6M4/NEfBKVjxO6zFH3EfUvDl1+FzFb7EVV+PeDjqiSQMHwq6f9/LR4EPY9AKO1ph17Aj0gPu86GgC+QuhhQ5GD4UdHxWKQ2G4UNBx5EPDYbhQ0HHkQ8NhuFDQce7GNJgGD4UdBz50GCuKHxKS0uhKAqKiorUeSKC4uJi2O12mM1mzJ07F0eOHPH7PbfbjcLCQqSkpCAuLg4LFy5ES0vLlbRCOsZ9PjSYYYdPTU0NXnjhBUyfPt1v/qZNm7B582Zs3boVNTU1sNlsWLBgATo7O9WaoqIilJWVweFw4MCBA+jq6kJ+fj683kCe7kThgiMfGsywwqerqwv33nsvXnzxRSQm/vuUeBHBs88+i8ceeww/+MEPkJmZiVdffRVnz57F66+/DgBwuVx4+eWX8cwzzyA3Nxc33ngjXnvtNdTX1+P9998fmXdFusLwocEMK3xWrlyJu+66C7m5uX7zGxsb4XQ6kZeXp84zmUzIyclBdXU1AKC2thZ9fX1+NXa7HZmZmWpNf263Gx0dHX4ThQ/ucKbBBHx5hcPhwMGDB1FTUzPgNafTCQCwWq1+861WK5qamtSa6OhovxHThZoLv99faWkpHn/88UBbJZ3gyIcGE9DIp7m5GatWrcJrr72GmJiYi9Yp/W6DKSID5vV3qZr169fD5XKpU3NzcyBtk8a4w5kGE1D41NbWoq2tDVlZWYiKikJUVBSqqqrw/PPPIyoqSh3x9B/BtLW1qa/ZbDZ4PB60t7dftKY/k8mEhIQEv4nCB0c+NJiAwmf+/Pmor69HXV2dOs2aNQv33nsv6urqMHHiRNhsNlRUVKi/4/F4UFVVhezsbABAVlYWjEajX01raysaGhrUGhpdOPKhwQS0zyc+Ph6ZmZl+8+Li4pCcnKzOLyoqQklJCTIyMpCRkYGSkhLExsZiyZIlAACLxYJly5ZhzZo1SE5ORlJSEtauXYtp06YN2IFN4U/AkQ8NbsTv5/PII4+gp6cHDz30ENrb23HLLbdg7969iI+PV2u2bNmCqKgo3HPPPejp6cH8+fPxyiuvYMyYMSPdDukAj3bRYBSR8PtkdHR0wGKxwOVycf+PzvlEsLalBVva2oZUvyA+Hn/LyLjsAQrSp0C+m7y2i4KOm100GIYPBV0gO5zHcMQTMRg+FHRNnss/KvmCrNjLP1yQRgeGDwWVAGgOIHxiDPxIRgquadIVHu+MHAwf0hV+ICMH1zXpioE7nCMGw4d0hR/IyMF1TbrCQ+2Rg+FDusIPZOTguiZd4T6fyMHwIV3hBzJycF2TrnCfT+Rg+JCu8AMZObiuSVe4zydyMHxIV/iBjBxc16QrHPlEDoYP6QovLI0cDB/SFX4gIwfXNekKN7siB8OHdIUfyMjBdU26wpMMIwfDh3SFH8jIwXVNusJ9PpGD4UO6wkPtkYPhQ7rCkU/kYPiQrvADGTm4rklXOPKJHFFaN0Cj35SYGAz1gckWPjQwYjB8KKgMAF5LTw+oXuHoJyIwfCioFEXhESwaFMe4RKQJhg8RaYLhQ0SaYPgQkSYYPkSkCYYPEWmC4UNEmgjL83xEzp8v29HRoXEnRPRNF76TF76jlxKW4dPZ2QkASEtL07gTIhpMZ2cnLBbLJWsUGUpE6YzP58PRo0cxdepUNDc3IyEhQeuWhqyjowNpaWnsO0TYd2iJCDo7O2G322G4zHV6YTnyMRgMuOaaawAACQkJYbVyLmDfocW+Q+dyI54LuMOZiDTB8CEiTYRt+JhMJmzYsAEmk0nrVgLCvkOLfetXWO5wJqLwF7YjHyIKbwwfItIEw4eINMHwISJNhGX4bNu2Denp6YiJiUFWVhY+/PBDTfv54IMPcPfdd8Nut0NRFLz99tt+r4sIiouLYbfbYTabMXfuXBw5csSvxu12o7CwECkpKYiLi8PChQvR0tIS1L5LS0tx0003IT4+HldddRUWLVqEo0eP6r737du3Y/r06eoJeLNnz8Z7772n654HU1paCkVRUFRUFHa9jwgJMw6HQ4xGo7z44ovy+eefy6pVqyQuLk6ampo062nPnj3y2GOPya5duwSAlJWV+b2+ceNGiY+Pl127dkl9fb0sXrxYrr76auno6FBrVqxYIddcc41UVFTIwYMHZd68eTJjxgw5d+5c0Pq+/fbbZefOndLQ0CB1dXVy1113yfjx46Wrq0vXve/evVveffddOXr0qBw9elQeffRRMRqN0tDQoNue+/vkk0/k2muvlenTp8uqVavU+eHQ+0gJu/C5+eabZcWKFX7zrr/+elm3bp1GHfnrHz4+n09sNpts3LhRndfb2ysWi0V27NghIiJnzpwRo9EoDodDrTlx4oQYDAYpLy8PWe9tbW0CQKqqqsKu98TERHnppZfCoufOzk7JyMiQiooKycnJUcMnHHofSWG12eXxeFBbW4u8vDy/+Xl5eaiurtaoq0trbGyE0+n069lkMiEnJ0ftuba2Fn19fX41drsdmZmZIX1fLpcLAJCUlBQ2vXu9XjgcDnR3d2P27Nlh0fPKlStx1113ITc3129+OPQ+ksLqwtLTp0/D6/XCarX6zbdarXA6nRp1dWkX+hqs56amJrUmOjoaiYmJA2pC9b5EBKtXr8att96KzMxMta8LffTvS+ve6+vrMXv2bPT29mLs2LEoKyvD1KlT1S+gHnsGAIfDgYMHD6KmpmbAa3r+/x0MYRU+F/R/oqWI6P4pl8PpOZTvq6CgAIcPH8aBAwcGvKbH3qdMmYK6ujqcOXMGu3btwtKlS1FVVaW+rseem5ubsWrVKuzduxcxMTEXrdNj78EQVptdKSkpGDNmzICEb2trG/CvhV7YbDYAuGTPNpsNHo8H7e3tF60JpsLCQuzevRuVlZVITU1V5+u59+joaEyePBmzZs1CaWkpZsyYgeeee07XPdfW1qKtrQ1ZWVmIiopCVFQUqqqq8PzzzyMqKkr923rsPRjCKnyio6ORlZWFiooKv/kVFRXIzs7WqKtLS09Ph81m8+vZ4/GgqqpK7TkrKwtGo9GvprW1FQ0NDUF9XyKCgoICvPXWW9i3bx/S+z1TXc+99ycicLvduu55/vz5qK+vR11dnTrNmjUL9957L+rq6jBx4kTd9h4U2uznHr4Lh9pffvll+fzzz6WoqEji4uLk2LFjmvXU2dkphw4dkkOHDgkA2bx5sxw6dEg9/L9x40axWCzy1ltvSX19vfz4xz8e9PBpamqqvP/++3Lw4EG57bbbgn749MEHHxSLxSL79++X1tZWdTp79qxao8fe169fLx988IE0NjbK4cOH5dFHHxWDwSB79+7Vbc8X882jXeHW+5UKu/AREfn9738vEyZMkOjoaJk5c6Z6aFgrlZWVAmDAtHTpUhE5fwh1w4YNYrPZxGQyyZw5c6S+vt5vGT09PVJQUCBJSUliNpslPz9fjh8/HtS+B+sZgOzcuVOt0WPvDzzwgLr+x40bJ/Pnz1eDR689X0z/8Amn3q8Ub6lBRJoIq30+RDR6MHyISBMMHyLSBMOHiDTB8CEiTTB8iEgTDB8i0gTDh4g0wfAhIk0wfIhIEwwfItIEw4eINPH/AbGhR7jjpX5bAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 300x300 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "observation,_=env.reset()\n",
    "gym_helper=GymHelper(env,figsize=(3,3))\n",
    "agent=PPO(env)\n",
    "for i in range(20):\n",
    "    gym_helper.render(title=str(i))\n",
    "    action=agent.choose_action(observation)\n",
    "    observation,reward,terminated,truncated,info=env.step(action)\n",
    "    done=terminated or truncated\n",
    "    time.sleep(0.5)\n",
    "    if done:\n",
    "        break\n",
    "gym_helper.render(title=\"finished\")\n",
    "env.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "372dc1a7",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
